*******************************************************************************
* 1. Clean HSW baseline data and merge with files for treatment assignment and household baseline

use "$data_input/OWWA_HSW_Wave1.dta", clear

*******************************************************************************

* First data cleaning
missings dropvars, force
compress

lab define l_yesno 0 "No" 1 "Yes", replace

* Check whether rno's are unique
duplicates report rno

* Merge with baseline HH and assignment data 
merge 1:1 rno using "$data_input/7 AIM_assignment.dta" , nogen keepusing(gift finlit sharing cohort agency interviewdate) keep(1 3) 
merge 1:1 rno using "$data_input/OWWA_HH_Wave1", nogen keep(1 3) 

missings dropvars, force

* Information on cohort is missing for three cases
replace cohort =1 if (h_room_1 =="712" & cohort>=.) 
replace cohort =11 if (h_room_1 =="708" & cohort>=.) 
replace cohort =22 if (h_room_1 =="MB2" & cohort>=.)

*******************************************************************************
* Indicator for classes where no registration list was available beforehand

gen date1=interviewdate+"2014"
gen date=date(date1,"DMY")
format date %td
tab date
drop date1

gen no_classlist=0
replace no_classlist =1 if date==td(04june2014)
replace no_classlist =1 if date==td(06june2014)
replace no_classlist =1 if date==td(27june2014)

replace no_classlist =1 if date==td(03july2014)
replace no_classlist =1 if date==td(14july2014)
replace no_classlist =1 if date==td(18july2014)
replace no_classlist =1 if date==td(19july2014)
replace no_classlist =1 if date==td(30july2014)
replace no_classlist =1 if date==td(31july2014)

replace no_classlist =1 if date==td(07august2014)
replace no_classlist =1 if date==td(08august2014)
replace no_classlist =1 if date==td(11august2014)
replace no_classlist =1 if date==td(13august2014)


*******************************************************************************

* Recode "gift intervention" - Refused = control (concerns only three observations)
* These three observations were anyway assigned to control group - probably this was an interviewer error
recode h_intervention_gift_1 (.b=0)

* Compare with assignment list (overall there are 88 cases with divergence between assignment list and TNS)
* and 4 cases where the assignment information is missing in the TNS data
tab h_intervention_gift_1 gift, nol

* Missing gift dummy means assignment list does not match TNS record
gen gift_assigned = gift
gen gift_tns = h_intervention_gift_1
replace gift = .c if h_intervention_gift_1!=gift

* Dummy for savings module treatment 
gen finlit_tns = 1 if h_intervention_pasay_1==1 | h_intervention_bacobo_1==1
replace finlit_tns = 0 if h_intervention_pasay_1==3 | h_intervention_bacobo_1==0
replace finlit_tns = 0 if finlit_tns==.c | finlit_tns==.

lab val finlit_tns l_yesno

* Dummy for sharing treatment (KSA only) 
gen sharing_tns = 1 if h_intervention_pasay_1==2
replace sharing_tns = 0 if h_intervention_pasay_1==3
replace sharing_tns = 0 if sharing_tns==.c
replace sharing_tns = 0 if h_country_1==1
lab val sharing l_yesno

*******************************************************************************
* Correct some inconsistencies in group assignments
replace finlit=finlit_tns if finlit==.c| finlit==.
replace sharing=sharing_tns if sharing==.c | sharing==.

* For two RNOs the group treatment does not fit the assigned room. The HSW was assigned to a different room in the morning
* Set them to missing
recode finlit sharing (0 1 =.c) if rno==264 | rno==3607

* One room was split in the morning. Create artificial room (66)
replace h_room_1 = "66" if cohort==6 & h_room_1=="11" & finlit==0

lab var gift "Intervention"
lab var gift_assigned "Intervention (original assignment)
lab var gift_tns "Intervention (TNS record)"

lab var sharing "Experience sharing"
lab var finlit "Savings module"


* Variable for clustering of standard errors
egen cluster = group(h_room_1 cohort)
egen cluster_date = group(h_room_1 h_date_interview)

sort rno

* Drop old individual assignments of the savings reminder
drop h_intervention_savings_reminder_ h_intervention_savings_none_1 h_intervention_savings_ref_1 h_savings_amount h_savings_amount_1

*******************************************************************************
* Controls from baseline


* Recruitment agency dummies
bysort agency: gen ag_obs=_N
gen agency_1 = agency if ag_obs>=30 // Categories for agencies with overall equal or more than 30 DWs. Otherwise bundle them in one group

replace agency_1=9998 if agency_1==. & h_country_1==1
replace agency_1=9999 if agency_1==. & h_country_1==2

* Communication
* Combine two small categories
gen comm_fam_1 = h_co3_talk_family_1
recode comm_fam_1 (7=6) (. .a .b .c=8) 
lab var comm_fam "Communicate with family"

* Treatment by employer
tab h_e23_work_sick_1
gen no_work_sick_1=h_e23_work_sick_1
recode no_work_sick_1 (1=0) (2=1) (3 97 98=.c)
lab var no_work_sick_1 "Not forced to work when sick"

* Food
tab h_e21_provide_food_1
gen enoughfood_1=h_e21_provide_food_1
recode enoughfood_1 (1 2 3=1) (4 5=0) (97 98=.c) //5 is maybe
lab var enoughfood_1 "Have enough food"

* Know working hours
gen hours_known=h_e14_workhours_1
recode hours_known (2 3 = 0)

* Expected working hours
gen exp_workhours=h_e14_workhours_exact_1
sum exp_workhours
local mw = r(mean)
replace exp_workhours = `mw' if hours_known==0

* Rest day
gen restday_1=h_e18_dayoff_1
recode restday_1 (1=1) (2 3 4=0) (97 98=.c)
lab var restday_1 "HSW has a rest day once a week"

*HSW savings target
* Exchange rates taken from http://www.x-rates.com/historical/?from=PHP&amount=1.00&date=2014-07-1
* Saving targets already converted into US Dollar in cleaning do-file
* Monthly exchange rates 
gen savings_target_usd = h_f37_amount_savings_target_us__

* Generate imputed savings target variable with missing set to the mean
gen savings_target_mis_1 = savings_target_usd==.c
gen i_savings_target_usd=savings_target_usd

replace i_savings_target_usd = 9600 if i_savings_target_usd>9600 & i_savings_target_usd<.  // cap savings target at 9600 USD which is equal to 400USDx24
sum i_savings_target_usd
replace i_savings_target_usd=r(mean) if savings_target_mis_1==1
gen l_i_savings_target_usd = log(i_savings_target_usd)

* HH savings target
gen hh_savings_target_usd_1=hh_ff31_amount_savings_target_us

gen c_hh_savings_target_usd_1 = hh_savings_target_usd_1 
replace c_hh_savings_target_usd_1=9600 if c_hh_savings_target_usd_1>9600 & c_hh_savings_target_usd_1<. // cap savings target at 9600 USD which is equal to 400USDx24

*lab var hh_savings_target_php_1 "Household savings target (in PHP)"
lab var hh_savings_target_usd_1 "Household savings target (in USD)"
lab var c_hh_savings_target_usd_1 "Household savings target (capped - in USD)"

* Financial coordination
tab1 h_f43_decide_budget_1 h_f45_plan_remittance_1 h_f46_disagree_remit_1, nol

gen discuss_amount_1=h_f43_decide_budget_1
gen discuss_use_1=h_f45_plan_remittance_1
gen disagree_use_1=h_f46_disagree_remit_1

gen remit_coord_mis_1 = discuss_amount_1>2 | discuss_use_1>2

recode discuss_amount_1 discuss_use_1 disagree_use_1 (.c .a .b .=0)
recode disagree_use_1 (. .c=0) if h_f45_plan_remittance_1 == 0

* Planned remittance channel
gen remit_bank_1 = 0 if h_f40_remittance_channel_1<.
replace remit_bank_1 = 1 if h_f40_remittance_channel_1==1
lab var remit_bank_1 "Plans to remit through bank"
lab val remit_bank_1 l_yesno

* Subjective well-being
gen happy_1=h_sw1a_how_happy_1
gen calm_1=h_sw1b_how_calm_1
lab var happy_1 "Happy"
lab var calm_1 "Calm"
recode happy_1 calm_1 (1=5) (2=4) (3=3) (4=2) (5=1) 
lab def l_reversecode 5 "All of the time" 4 "Most of the time" 3 "Some of the time" 2 "A little of the time" 1 "None of the time"
lab val happy_1 calm_1 l_reversecode

gen nervous_1=h_sw1c_how_nervous_1
gen downheart_1=h_sw1d_how_downheart_1
gen pain_1=h_sw1f_how_pain_1
gen down_1=h_sw1e_how_downdumps_1 

*recode nervous_1 downheart_1 pain_1 down_1 (6 7=.c)

lab var nervous_1 "Nervous"
lab var downheart_1 "Down hearted and blue"
lab var pain_1 "Experienced pain"
lab var down_1 "Down"

lab def l_mhcode  1 "All of the time" 2 "Most of the time" 3 "Some of the time" 4 "A little of the time" 5 "None of the time"
lab val nervous_1 downheart_1 pain_1 down_1 l_mhcode

* Number of years HSW plans to work abroad
gen workabroad_yrs_1 = h_e2_extension_1  
replace workabroad_yrs_1 = .c if h_e2_extension_1==99
gen workabroad_m2yrs_1 = workabroad_yrs_1>2 & workabroad_yrs_1<. // missing (don't know is coded as zero)

* Impute missing with mean
sum workabroad_yrs_1, meanonly
gen mis_workabroad_yrs_1 = workabroad_yrs_1==.c
replace workabroad_yrs_1 = r(mean) if workabroad_yrs_1==.c

lab var workabroad_yrs_1 "Number of years HSW plans to work abroad"
lab var workabroad_m2yrs_1 "HSW plans to work abroad for more than two years"
lab var mis_workabroad_yrs_1 "Number of planned duration missing"

* HSW thinks it is good for her children to become an OFW
gen good_child_ofw_1=h_a2e_fam_child_ofw_1
recode good_child_ofw_1 (1=5) (2=4) (3=3) (4=2) (5=1) (6 7 .a .b .c = 6 )
lab def l_good_child_ofw_1  6 "No answer" 5 "Strongly Agree" 4 "Agree" 3 "Neither Agree nor Disagree" 2 "Disagree" 1 "Strongly Disagree"
lab val good_child_ofw_1 l_good_child_ofw_1
lab var good_child_ofw_1 "HSW thinks good for children to become OFW"

* No contact for emergencies
gen no_em_contact = 0
replace no_em_contact = 1 if  h_e26_agency_ph_1==0 & h_e26_agency_abroad_1==0

* Covariates for balance tests and cluster estimation
gen ksa = h_country_1 == 2
gen age = 2014 - h_d2_birthyear_1
gen married = h_d3_maritalstatus_1==2 if h_d3_maritalstatus_1<.
gen speak_tagalog = h_d8_language_1==1 if h_d8_language_1<.
recode h_d4_education_1 (2 3 = 2) (6 7 = 6), gen(education) // combine some very some small categories 
gen max_highschool = h_d4_education_1<=5
gen college = h_d4_education_1==8 | h_d4_education_1==8
gen child = h_d7a_biochild_1==1 if h_d7a_biochild_1<.
gen worked = h_d9_work_1==1 if h_d9_work_1<.
gen hsw = h_e3_phildh_1==1 if h_e3_phildh_1<.
replace hsw=0 if h_e3_phildh_1==.c
gen born_pangasinan = h_m2_birthplace_prov_1==15500000 if h_m2_birthplace_prov_1<.
gen born_ncr = h_m2_birthplace_prov_1==130000000 if h_m2_birthplace_prov_1<.
gen nointernet = h_c3_hswinternet_1==8
gen pers_sav = h_f26_have_savings_1==1 if h_f26_have_savings_1<.
gen account = h_f22_bank_acct_ph_1==1 if h_f22_bank_acct_ph_1<.
replace account = 0 if h_f22_bank_acct_ph_1==.c
gen dest_Riyadh = h_c10_ksacity_1 == 1 if h_c10_ksacity_1<.c
gen sal_deduct = h_e9a_deduct_1==1 if h_e9a_deduct_1<.
gen talk_employer = h_e10_talked_employer_1==1 if h_e10_talked_employer_1<.
replace talk_employer=0 if h_e10_talked_employer_1==.c
gen contact_dest = h_roster_rel_ksa_hk_1==1 if h_roster_rel_ksa_hk_1<.
gen knows_lang = h_k4_language_1==1 if h_k4_language_1<.
gen roaming = h_c9_roaming_1==1
gen baseline_mis = h_f22_bank_acct_ph_1==.c | h_e3_phildh_1==.c | h_e10_talked_employer_1==.c | h_c3_hswinternet_1==.c | h_d4_education_1==.c

lab var age "Age"
lab var ksa "Destination country is KSA (0/1)"
lab var married "Married (0/1)"
lab var child "Has children (0/1)"
lab var speak_tagalog "Speaks Tagalog at home (0/1)"
lab var max_highschool "Highschool degree or less (0/1)"
lab var college "College degree (0/1)"
lab var worked "Worked 6 months ago 0/1)"
lab var hsw "Worked as domestic helper in PH (0/1)"
lab var born_pangasinan "Born in Pangasinan province (0/1)"
lab var born_ncr "Born in National Capital Region (0/1)"
lab var nointernet "Does not use internet (0/1)"
lab var pers_sav "Has personal savings (0/1)"
lab var account "Has personal bank account (0/1)"
lab var sal_deduct "Salary deduction (0/1)"
lab var talk_employer "Has talked to employer (0/1)"
lab var contact_dest "Knows someone at destination (0/1)"
lab var knows_lang "Knows the name of the language spoken at destination (0/1)"
lab var dest_Riyadh "Destination city is Riyadh (0/1)"
lab var roaming "Plans to use roaming (0/1)"
lab var baseline_mis "Baseline covariate missing"

local gen_info "married child speak_tagalog"
local educ "max_highschool college"
local employ "worked hsw"
local location "born_pangasinan born_ncr"
local comm "nointernet roaming"
local fin "pers_sav account discuss_amount_1 discuss_use_1 disagree_use_1 "
local emp_abroad "sal_deduct talk_employer contact_dest knows_lang dest_Riyadh"
local treatment "no_work_sick_1 enoughfood_1"

* generate recoded variables for imputation 
gen happy_im_1=happy_1
gen calm_im_1=calm_1
gen nervous_im_1=nervous_1
gen downheart_im_1=downheart_1
gen pain_im_1=pain_1
gen down_im_1=down_1

recode happy_im_1 calm_im_1 nervous_im_1 downheart_im_1 pain_im_1 down_im_1  (.a .b .c = .)

* Impute missings for wellbeing variables
mi set wide
mi register imputed happy_im_1 calm_im_1 nervous_im_1 downheart_im_1 pain_im_1 down_im_1 
mi impute chained (ologit) happy_im_1 calm_im_1 nervous_im_1 downheart_im_1 pain_im_1 down_im_1 = account child max_highschool college age nointernet ksa baseline_mis, add(1) noisily
rename _1_happy_im_1 i_happy_1
rename _1_calm_im_1 i_calm_1
rename _1_nervous_im_1 i_nervous_1
rename _1_downheart_im_1 i_downheart_1
rename _1_pain_im_1 i_pain_1
rename _1_down_im_1 i_down_1

lab var i_happy_1 "Happy (imputed)"
lab var i_calm_1 "Calm (imputed)"
lab var i_nervous_1 "Nervous (imputed)"
lab var i_downheart_1 "Down hearted and blue (imputed)"
lab var i_pain_1 "Experienced pain (imputed)"
lab var i_down_1 "Down in the dumps (imputed)"

gen mental_health_1 = i_nervous_1 + i_downheart_1 + i_pain_1 + i_down_1 + i_calm_1 + i_happy_1
lab var mental_health_1 "Mental health index"

*******************************************************************************
* Variables from household data

* Recode posessions to binary variables
foreach item in car motor washing_machine aircon computer gasrange refrigerator landline cp tv dvd {
	capture gen b_`item'_1 = hh_hhe11_`item'_1>0 if hh_hhe11_`item'_1<.
}

lab var b_car_1 "HH owns car"
lab var b_motor_1 "HH owns motorcycle"
lab var b_washing_machine_1 "HH owns washingmachine"
lab var b_aircon_1 "HH owns aircondition"
lab var b_computer_1 "HH owns computer"
lab var b_gasrange_1 "HH owns gasrange"
lab var b_refrigerator_1 "HH owns refrigerator"
lab var b_landline_1 "HH owns landline"
lab var b_cp_1 "HH owns mobile phone"
lab var b_tv_1 "HH owns TV"
lab var b_dvd_1 "HH owns DVD player"

lab val b_car_1 b_motor_1 b_washing_machine_1 b_aircon_1 b_computer_1 b_gasrange_1 b_refrigerator_1 b_landline_1 b_cp_1 b_tv_1 b_dvd_1 l_yesno

gen net_ref = h_e5_foundjob_1==2
lab var net_ref "Job from personal network"
lab val net_ref l_yesno

*******************************************************************************

* Gen variable for household size
egen household_size_1 =rownonmiss(h_hhid_mem*_1)
recode household_size_ (0=.)
label var household_size_1 "Household size"

*******************************************************************************


*replace . with .c
qui ds, has(type numeric)
local x=r(varlist)
recode `x' (.=.c) 

save "$data_prep/owwa_hsw_baseline_edited", replace





